## ------------------------------
## PA validation using ZDD method
## ------------------------------
library(spdep)
library(rgdal)
library(igraph)
library(redist)
library(entropy)
library(doMC)
source("../random_submap.R")

nc <- as.numeric(Sys.getenv("OMP_NUM_THREADS"))

## Params
params <- expand.grid(nprecs = 30,
                      ndists = 2,
                      ntests = 1:100)

t <- as.numeric(Sys.getenv("SLURM_ARRAY_TASK_ID"))

nprecs  <- params$nprecs[t]
ndists  <- params$ndists[t]
nsims <- 10000

ks_pval <- function(stat_truth, stat_sim){
    stat_truth <- stat_truth[!is.na(stat_truth)]
    stat_sim <- stat_sim[!is.na(stat_sim)]    
    return(ks.test(stat_truth, stat_sim)$p.value)
}

## ----------------------
## Create sample data set
## ----------------------

## Load shape file, convert to adjacency list
map <- readOGR(path.expand("../../data/fl"), "FL")
map@data$pop <- as.numeric(as.character(map@data$pop))
map@data$mccain <- as.numeric(as.character(map@data$mccain))

## Sample a random sub-map
map_sub <- random_submap(map, nprecs)

## Create directory
dir.create(paste0("../../data/enumpart_out_", nprecs, "_", ndists))
savedir <- paste0("../../data/enumpart_out_", nprecs, "_", ndists)

## Save in proper format for enumpart
adjlist <- poly2nb(map_sub, queen = FALSE)
sink(paste0(savedir, "/map_sub_", t, ".dat"))
for(i in 1:length(adjlist)){
    sub <- adjlist[[i]]
    sub <- sub[sub > i]
    if(length(sub) > 0){
        for(j in 1:length(sub)){
            cat(paste(i, sub[j], collapse = " "))
            cat("\n")
        }
    }
}
sink()

## Reorder edges - this step reorders the edges of edgelist to make
## ZDD construction more efficient
system(paste0("python ../ndscut.py <", savedir, "/map_sub_", t, ".dat >", savedir, "/map_sub_ordered_", t, ".dat"), wait = TRUE)
system(paste0("mv ", savedir, "/map_sub_ordered_", t, ".dat ", savedir, "/map_sub_", t, ".dat"), wait = TRUE)

## Run enumpart
cat("Run enumpart.\n")
system(paste0("../../enumpart_private/enumpart ", savedir, "/map_sub_", t, ".dat -k ", ndists, " -allsols > ", savedir, "/map_sub_", t, "_solutions.dat"), wait = TRUE)
cat("Finished running enumpart.\n")

## Load edgelist
el <- strsplit(readLines(paste0(savedir, "/map_sub_", t, ".dat")),
               split = " ")
el <- apply(do.call(rbind, el), 2, as.numeric)

## Read in solutions and clean to numeric
sols <- readLines(paste0(savedir, "/map_sub_", t, "_solutions.dat"))
sols_out <- lapply(1:length(sols), function(x){
    sol_split <- as.numeric(strsplit(sols[x], split = " ")[[1]])
    el_sub <- el[sol_split,]
    comps_out <- components(graph_from_edgelist(el_sub, directed = FALSE))
    if(comps_out$no < ndists){
        max_num <- max(comps_out$membership)
        comps_out <- c(comps_out$membership, (max_num + 1):ndists)
    }else{
        comps_out <- comps_out$membership
    }
    return(comps_out)
})
sols_out <- do.call(cbind, sols_out)

## Calculate distance from parity
parity_dist <- apply(sols_out, 2, function(x){
    df <- data.frame(dists = x, pop = map_sub@data$pop)
    mean_pop <- sum(df$pop) / length(unique(x))
    tab <- tapply(df$pop, df$dists, sum)
    return(max(abs(tab - mean_pop) / mean_pop))
})

## Calculate efficiency gap and dem segregation
seg <- redist.segcalc(
    algout = sols_out,
    grouppop = map_sub@data$mccain,
    fullpop = map_sub@data$pop
)

## ----------------
## Run redist.rsg()
## ----------------
cat("Run RSG simulations.\n")
adjlist_zind <- lapply(adjlist, function(x){x - 1})

registerDoMC(nc)
out_rsg <- foreach(i = 1:nsims, .packages = "redist") %dopar% {

    ## Unconstrained
    repeat{
        rsg_out_unc <- try(
            redist.rsg(adjlist_zind, population = map_sub@data$pop,
                       ndists = ndists, thresh = 100, verbose = FALSE),
            silent = TRUE
        )
        if(!inherits(rsg_out_unc, "try-error")){
            break
        }
    }

    ## 20%
    repeat{
        rsg_out_20h <- try(
            redist.rsg(adjlist_zind, population = map_sub@data$pop,
                       ndists = ndists, thresh = .2, verbose = FALSE),
            silent = TRUE
        )
        if(!inherits(rsg_out_20h, "try-error")){
            break
        }
    }

    ## 10%
    repeat{
        rsg_out_10h <- try(
            redist.rsg(adjlist_zind, population = map_sub@data$pop,
                       ndists = ndists, thresh = .1, verbose = FALSE),
            silent = TRUE
        )
        if(!inherits(rsg_out_10h, "try-error")){
            break
        }
    }

    return(list(rsg_out_unc$district_membership,
                rsg_out_20h$district_membership,
                rsg_out_10h$district_membership))
    
    if(i %% (nsims / 10) == 0){
        cat(paste0(i/nsims*100, "% done.\n"))
    }
    
}

## Unpack
rsg_out_unc <- matrix(NA, nprecs, nsims)
rsg_out_20h <- matrix(NA, nprecs, nsims)
rsg_out_10h <- matrix(NA, nprecs, nsims)
for(i in 1:length(out_rsg)){
    rsg_out_unc[,i] <- out_rsg[[i]][[1]]
    rsg_out_20h[,i] <- out_rsg[[i]][[2]]
    rsg_out_10h[,i] <- out_rsg[[i]][[3]]
}

## Calculate segregation
seg_rsg_unc <- redist.segcalc(
    algout = rsg_out_unc,
    grouppop = map_sub@data$mccain,
    fullpop = map_sub@data$pop
)
seg_rsg_20h <- redist.segcalc(
    algout = rsg_out_20h,
    grouppop = map_sub@data$mccain,
    fullpop = map_sub@data$pop
)
seg_rsg_10h <- redist.segcalc(
    algout = rsg_out_10h,
    grouppop = map_sub@data$mccain,
    fullpop = map_sub@data$pop
)

## Dem segregation distribution measures
ks_seg_rsg_unc <- ks_pval(seg, seg_rsg_unc)
ks_seg_rsg_20 <- ks_pval(seg[parity_dist <= .2], seg_rsg_20h)
ks_seg_rsg_10 <- ks_pval(seg[parity_dist <= .1], seg_rsg_10h)

## -------------------
## Run MCMC algorithms
## -------------------
## Unconstrained
mcmc_out_unc <- redist.mcmc(adjlist_zind, popvec = map_sub@data$pop,
                            eprob = 0, nsims = nsims, ndists = ndists)
seg_mcmc_unc <- redist.segcalc(
    algout = mcmc_out_unc,
    grouppop = map_sub@data$mccain,
    fullpop = map_sub@data$pop
)
ks_seg_mcmc_unc <- ks_pval(seg, seg_mcmc_unc)

## 20%
params <- expand.grid(beta_target = seq(5, 50, by = 5),
                      beta_weight = seq(2, 10, by = 1))
out_mcmc_20t <- foreach(i = 1:nrow(params), .packages = "redist", .combine = rbind) %dopar% {

    ## Run alg
    betaweights <- rep(NA, 10)
    for(j in 1:10){
        betaweights[j] <- params$beta_weight[i]^j
    }
    mcmc_out_20t <- redist.mcmc(
        adjlist_zind,
        popvec = map_sub@data$pop,
        nsims = nsims,
        ndists = ndists,
        eprob = 0,
        constraint = "population",
        constraintweights = params$beta_target[i],
        temper = TRUE,
        betaweights = betaweights
    )
    beta_tab <- abs(table(mcmc_out_20t$beta_sequence) -
                    nsims / length(betaweights))
    beta_dist <- (length(betaweights) - length(beta_tab)) *
        (nsims / length(betaweights)) + sum(beta_tab)
    mh_accept <- mean(mcmc_out_20t$mhdecisions)

    ## Reweight
    mcmc_out_20t <- redist.ipw(mcmc_out_20t, targetpop = .2)

    ## Calc seg, eg
    seg_mcmc_20t <- redist.segcalc(
        algout = mcmc_out_20t,
        grouppop = map_sub@data$mccain,
        fullpop = map_sub@data$pop
    )

    ## Calculate stats
    ks_seg_mcmc_20 <- ks_pval(seg[parity_dist <= .2], seg_mcmc_20t)

    ## Output df
    out <- data.frame(ks_seg = ks_seg_mcmc_20,
                      ar_mcmc = mh_accept,
                      beta_dist = beta_dist)

    return(out)
    
}
out_20 <- out_mcmc_20t[out_mcmc_20t$ar_mcmc <= .4 & out_mcmc_20t$ar_mcmc >= .2,]
best_param <- which.min(out_20$beta_dist)
ks_seg_mcmc_20 <- out_20$ks_seg[best_param]

## 10%
out_mcmc_10t <- foreach(i = 1:nrow(params), .packages = "redist", .combine = rbind) %dopar% {

    ## Run alg
    betaweights <- rep(NA, 10)
    for(j in 1:10){
        betaweights[j] <- params$beta_weight[i]^j
    }
    mcmc_out_10t <- redist.mcmc(
        adjlist_zind,
        popvec = map_sub@data$pop,
        nsims = nsims,
        ndists = ndists,
        eprob = 0,
        constraint = "population",
        constraintweights = params$beta_target[i],
        temper = TRUE,
        betaweights = betaweights
    )
    beta_tab <- abs(table(mcmc_out_10t$beta_sequence) -
                    nsims / length(betaweights))
    beta_dist <- (length(betaweights) - length(beta_tab)) *
        (nsims / length(betaweights)) + sum(beta_tab)
    mh_accept <- mean(mcmc_out_10t$mhdecisions)

    ## Reweight
    mcmc_out_10t <- redist.ipw(mcmc_out_10t, targetpop = .1)

    ## Calc seg, eg
    seg_mcmc_10t <- redist.segcalc(
        algout = mcmc_out_10t,
        grouppop = map_sub@data$mccain,
        fullpop = map_sub@data$pop
    )

    ## Calculate stats
    ks_seg_mcmc_10 <- ks_pval(seg[parity_dist <= .1], seg_mcmc_10t)

    ## Output df
    out <- data.frame(ks_seg = ks_seg_mcmc_10,
                      ar_mcmc = mh_accept,
                      beta_dist = beta_dist)

    return(out)
    
}
out_10 <- out_mcmc_10t[out_mcmc_10t$ar_mcmc <= .4 & out_mcmc_10t$ar_mcmc >= .2,]
best_param <- which.min(out_10$beta_dist)
ks_seg_mcmc_10 <- out_10$ks_seg[best_param]

## -------------
## Create output
## -------------

## Return data frame
out <- data.frame(val = c(ks_seg_mcmc_unc, ks_seg_mcmc_20, ks_seg_mcmc_10,
                          ks_seg_rsg_unc, ks_seg_rsg_20, ks_seg_rsg_10),
                  popcons = rep(c("Unconstrained", "20%", "10%"), 2),
                  algorithm = c(rep("MCMC", 3), rep("RSG", 3)),
                  stat = rep("KS P-Value", 6),
                  measure = rep("Segregation Index", 6),
                  trial = t)
map_rownames <- rownames(map_sub@data)

out_list <- list(stats = out, map_rownames = map_rownames, par_true = parity_dist, plans_lt_1pct = sum(parity_dist <= 0.01), pop_sd = sd(map_sub@data$pop))

save(out_list, file = paste0(savedir, "/sim_", t, ".RData"))     
